# To install packages, change eval=TRUE and run the following:
install.packages("tidyverse")
install.packages("tidycensus")
install.packages("readr")
install.packages("gridExtra")
install.packages("sf")
install.packages("tigris")
install.packages("janitor")
install.packages("mapview")
# Instructions for tidycensus:
# https://walker-data.com/tidycensus/articles/basic-usage.html
# Census code lists, definitions, accuracy:
# https://www.census.gov/programs-surveys/acs/technical-documentation/code-lists.html
# Census API variables:
# https://api.census.gov/data/2019/acs/acs1/variables.html
# https://api.census.gov/data/2010/dec/sf1/variables.html
# Load libraries
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.3 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidycensus)
library(readr)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
##
## Attaching package: 'tigris'
## The following object is masked from 'package:tidycensus':
##
## fips_codes
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(mapview)
# API Key:
api_key <- read_file("api.key")
census_api_key(api_key)
## To install your API key for use in future sessions, run this function with `install = TRUE`.
# Variables of interest:
total_population <- c("B01003_001E")
# Vacancy status variables:
vacancy_vars <- c("B25004_001E", "B25004_002E", "B25004_003E", "B25004_004E",
"B25004_005E", "B25004_006E", "B25004_007E", "B25004_008E",
"B25005_001E", "B25005_002E", "B25005_003E")
# Internet/Broadband variable:
# (there are more variables, check out the 2019 acs variables list)
internet_variables <- c("B28002_001E", "B28002_002E", "B28002_003E",
"B28002_004E", "B28002_005E", "B28002_006E",
"B28002_007E", "B28002_008E", "B28002_009E",
"B28002_010E", "B28002_011E", "B28002_012E",
"B28002_013E")
internet_variables_noE <- substr(internet_variables, 1, nchar(internet_variables)-1)
# Variables:
# all_vars <- c(total_population, vacancy_vars, internet_variables)
all_vars <- c(total_population, internet_variables)
# Get variable descriptions:
variable_descriptions <- load_variables(2019, "acs5")
# Get a dataset:
the_data <- get_acs(geography="block group", year=2019, state="MA",
variables=all_vars, geometry=TRUE)
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
##
|
| | 0%
|
|= | 1%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|=========== | 16%
|
|============== | 19%
|
|============== | 21%
|
|=============== | 21%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|===================== | 29%
|
|===================== | 30%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 35%
|
|========================= | 35%
|
|========================== | 37%
|
|=========================== | 39%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 46%
|
|================================== | 48%
|
|=================================== | 49%
|
|==================================== | 51%
|
|===================================== | 53%
|
|====================================== | 55%
|
|======================================== | 57%
|
|========================================= | 59%
|
|========================================== | 60%
|
|============================================ | 62%
|
|============================================ | 64%
|
|============================================= | 64%
|
|============================================== | 66%
|
|=============================================== | 68%
|
|================================================= | 70%
|
|================================================== | 71%
|
|=================================================== | 73%
|
|==================================================== | 75%
|
|======================================================= | 78%
|
|======================================================== | 80%
|
|=========================================================== | 84%
|
|============================================================ | 86%
|
|============================================================== | 89%
|
|================================================================ | 91%
|
|================================================================== | 94%
|
|=================================================================== | 96%
|
|======================================================================| 100%
data_against_summary <- get_acs(geography="block group", year=2019, state="MA",
variables=all_vars, geometry=TRUE, summary_var="B01003_001")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
# Only for Suffolk county:
data_against_summary_suffolk <- get_acs(geography="block group", year=2019, state="MA", county="Suffolk",
variables=all_vars, geometry=TRUE, summary_var="B01003_001")
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
# Divide estimates by summary:
data_against_summary$percent <- (data_against_summary$estimate / data_against_summary$summary_est) * 100
data_against_summary_suffolk$percent <- (data_against_summary_suffolk$estimate / data_against_summary_suffolk$summary_est) * 100
# Save to csv:
write.csv(the_data, "2019acs_internet_vacancy.csv")
write.csv(data_against_summary, "2019acs_internet_vacancy_against_summary.csv")
# Merge data with dataset descriptions:
data_against_summary2 <- merge(data_against_summary, variable_descriptions, by.x="variable", by.y="name", all.x=TRUE)
# Get population by county:
county_population <- get_acs(geography="county", year=2019, state="MA", variables=total_population)
## Getting data from the 2015-2019 5-year ACS
# Get counties:
county_vector <- county_population$NAME
county_vector <- substr(county_vector, 1, nchar(county_vector)-15)
county_vector
## [1] "Barnstable County" "Berkshire County" "Bristol County"
## [4] "Dukes County" "Essex County" "Franklin County"
## [7] "Hampden County" "Hampshire County" "Middlesex County"
## [10] "Nantucket County" "Norfolk County" "Plymouth County"
## [13] "Suffolk County" "Worcester County"
# Geometries:
the_data$geometry[1]
## Geometry set for 1 feature
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -72.56997 ymin: 42.6548 xmax: -72.4895 ymax: 42.72995
## Geodetic CRS: NAD83
## MULTIPOLYGON (((-72.56997 42.72995, -72.56587 4...
# ##############################################################################
# Figure of total population:
the_data %>%
filter(variable=="B01003_001") %>%
ggplot(aes(fill=estimate)) +
geom_sf(color=NA) +
coord_sf(crs=4326, expand=FALSE) +
# coord_sf(crs=26911, expand=FALSE) +
# coord_sf(crs=26911, expand=FALSE, xlim=c(-74, -70), ylim=c(42, 44)) +
scale_fill_viridis_c(option = "magma", na.value="white") +
ggtitle("Total Population")

# Figure/html of total population:
total_pop_map <- the_data %>%
filter(variable=="B01003_001") %>%
rename(`Total Population` = estimate) %>%
mapView(zcol="Total Population", na.color="white")
total_pop_map
mapshot(total_pop_map, url="total_popmap.html")
# Figure/html of no internet:
no_internet_map <- data_against_summary %>%
filter(variable=="B28002_013") %>%
rename(`No Internet` = percent) %>%
mapView(zcol="No Internet", na.color="white")
no_internet_map
mapshot(no_internet_map, url="nointernet_map.html")
# Figure/html of broadband subscription:
broadband_sub_map <- data_against_summary %>%
filter(variable=="B28002_004") %>%
rename(`Broadband Subscription` = percent) %>%
mapView(zcol="Broadband Subscription", na.color="white")
broadband_sub_map
mapshot(broadband_sub_map, url="broadband_subscript_map.html")
# Figure/html of broadband subscription fiberoptic/cable/dsl:
cable_sub_map <- data_against_summary %>%
filter(variable=="B28002_007") %>%
rename(`Cable/Fiberoptic/DSL` = percent) %>%
mapView(zcol="Cable/Fiberoptic/DSL", na.color="white")
cable_sub_map
mapshot(cable_sub_map, url="broadband_CableFiberOpticDSL_map.html")
# # ##############################################################################
# # For cable: B28002_007E
# data_against_summary %>%
# filter(variable=="B28002_007") %>%
# ggplot(aes(fill=percent)) +
# geom_sf(color=NA) +
# coord_sf(crs=26911) +
# scale_fill_viridis_c(option = "magma", na.value="white") +
# ggtitle("With Cable/Fiber optic/DSL Broadband subscription")
# # For only the county:
# data_against_summary_suffolk %>%
# filter(variable=="B28002_007") %>%
# ggplot(aes(fill=percent)) +
# geom_sf(color=NA) +
# coord_sf(crs=26911) +
# scale_fill_viridis_c(option = "magma", na.value="white") +
# ggtitle("With Cable/Fiber optic/DSL Broadband subscription, Suffolk County")
# # ###############################################################################
# # No access to internet: B28002_013
# data_against_summary_suffolk %>%
# filter(variable=="B28002_013") %>%
# ggplot(aes(fill=percent)) +
# geom_sf(color=NA) +
# coord_sf(crs=26911) +
# scale_fill_viridis_c(option = "magma", na.value="white") +
# ggtitle("Without Access to Internet, Suffolk County")
# # No access to internet: B28002_013
# data_against_summary %>%
# filter(variable=="B28002_013") %>%
# ggplot(aes(fill=percent)) +
# geom_sf(color=NA) +
# coord_sf(crs=26911) +
# scale_fill_viridis_c(option = "magma", na.value="white") +
# ggtitle("Without Access to Internet")
# # All internet variables:
# data_against_summary2 %>%
# filter(variable!="B01003_001") %>%
# ggplot(aes(fill=percent)) +
# geom_sf(color=NA) +
# coord_sf(crs=26911) +
# scale_fill_viridis_c(option = "magma", na.value="white") +
# facet_wrap(~variable, ncol=3)
# Plot internet variables for MA:
# Image list:
ma_images <- list()
# Loop to generate images:
for (the_var in internet_variables_noE){
# Get variable description:
var_des <- variable_descriptions$label[variable_descriptions$name==the_var]
cat("For variable: ", the_var, ": ", var_des, "\n")
# Make plot:
ma_images[[the_var]] <- data_against_summary %>%
filter(variable==the_var) %>%
ggplot(aes(fill=percent)) +
geom_sf(color=NA) +
coord_sf(crs=4326) +
scale_fill_viridis_c(option = "magma", na.value="white") +
ggtitle(paste("Massachusetts\n", var_des, " \n(", the_var, ")")) +
theme(plot.title=element_text(size=12),
plot.margin=unit(c(0.25, 0.25, 0.25, 0.25), "in"))
# Plot image:
print(ma_images[[the_var]])
}
# # Image list:
# suffolk_images <- list()
#
# # Loop to generate images:
# for (the_var in internet_variables_noE){
# # Get variable description:
# var_des <- variable_descriptions$label[variable_descriptions$name==the_var]
# # Make plot:
# suffolk_images[[the_var]] <- data_against_summary_suffolk %>%
# filter(variable==the_var) %>%
# ggplot(aes(fill=percent)) +
# geom_sf(color=NA) +
# coord_sf(crs=26911) +
# scale_fill_viridis_c(option = "magma", na.value="white") +
# ggtitle(paste("Suffolk County\n", var_des, " \n(", the_var, ")")) +
# theme(plot.title=element_text(size=12),
# plot.margin=unit(c(0.25, 0.25, 0.25, 0.25), "in"))
# # Plot image:
# print(suffolk_images[[the_var]])
# }
# Do for all counties:
# Initialize list to store images:
county_images = list()
for (county in county_vector){
cat("For county: ", county, "\n")
# Get county data:
county_data <- get_acs(geography="block group", year=2019, state="MA", county=county,
variables=all_vars, geometry=TRUE, summary_var="B01003_001")
# Divide estimates by summary:
county_data$percent <- (county_data$estimate / county_data$summary_est) * 100
# Initalize list to store images:
county_images[[county]] = list()
# Loop to generate images:
for (the_var in internet_variables_noE){
var_des <- variable_descriptions$label[variable_descriptions$name==the_var]
cat("For variable: ", the_var, ": ", var_des, "\n")
# Make plot:
county_images[[county]][[the_var]] <- county_data %>%
filter(variable==the_var) %>%
ggplot(aes(fill=percent)) +
geom_sf(color=NA) +
coord_sf(crs=4326) +
scale_fill_viridis_c(option = "magma", na.value="white") +
ggtitle(paste(county, "\n", var_des, " \n(", the_var, ")")) +
theme(plot.title=element_text(size=12),
plot.margin=unit(c(0.25, 0.25, 0.25, 0.25), "in"))
# Plot image:
print(county_images[[county]][[the_var]])
}
}
# Generate R citations
citation("tidyverse")
##
## Wickham et al., (2019). Welcome to the tidyverse. Journal of Open
## Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {Welcome to the {tidyverse}},
## author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},
## year = {2019},
## journal = {Journal of Open Source Software},
## volume = {4},
## number = {43},
## pages = {1686},
## doi = {10.21105/joss.01686},
## }
citation("tidycensus")
##
## To cite package 'tidycensus' in publications use:
##
## Kyle Walker and Matt Herman (2021). tidycensus: Load US Census
## Boundary and Attribute Data as 'tidyverse' and 'sf'-Ready Data
## Frames. R package version 1.1.
## https://CRAN.R-project.org/package=tidycensus
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {tidycensus: Load US Census Boundary and Attribute Data as 'tidyverse' and
## 'sf'-Ready Data Frames},
## author = {Kyle Walker and Matt Herman},
## year = {2021},
## note = {R package version 1.1},
## url = {https://CRAN.R-project.org/package=tidycensus},
## }
citation("mapview")
##
## To cite package 'mapview' in publications use:
##
## Tim Appelhans, Florian Detsch, Christoph Reudenbach and Stefan
## Woellauer (2021). mapview: Interactive Viewing of Spatial Data in R.
## R package version 2.10.0. https://CRAN.R-project.org/package=mapview
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {mapview: Interactive Viewing of Spatial Data in R},
## author = {Tim Appelhans and Florian Detsch and Christoph Reudenbach and Stefan Woellauer},
## year = {2021},
## note = {R package version 2.10.0},
## url = {https://CRAN.R-project.org/package=mapview},
## }
citation("leaflet")
##
## To cite package 'leaflet' in publications use:
##
## Joe Cheng, Bhaskar Karambelkar and Yihui Xie (2021). leaflet: Create
## Interactive Web Maps with the JavaScript 'Leaflet' Library. R package
## version 2.0.4.1. https://CRAN.R-project.org/package=leaflet
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {leaflet: Create Interactive Web Maps with the JavaScript 'Leaflet'
## Library},
## author = {Joe Cheng and Bhaskar Karambelkar and Yihui Xie},
## year = {2021},
## note = {R package version 2.0.4.1},
## url = {https://CRAN.R-project.org/package=leaflet},
## }